ss2_glia part

load all dependancies

source("analysis.r")

load seurat object from ss2_neur/glia sub-clustering results in previous page
because ss2_glia has only 3 sub-clusters, not as complicated as ss2_neur's, first to do ss2_glia

ss2_glia.seur <- readRDS("ss2_glia.seur.rds")

before differeital analysis

before differential analysis, ..., in fact, did that directly more than once then found sth. interesting,
ss2_Glia_1, no matter inpaper or inlocal(99% matched anyway), may further contain at least two distinct sub-clusters,
just as that one of the three marker genes shows the difference (though they are not that unique).

multiplot(
  DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
  DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
  FeaturePlot(ss2_glia.seur, c("Slc18a2")) + coord_fixed(0.88), cols = 1
)

could also find a few unique (not 100% clean, just visually obvious) identifiers on the top empty part of Glia_1,
like "Rxrg","Xylt1","Pappa" and so on.

multiplot(
  FeaturePlot(ss2_glia.seur, c("Rxrg")) + coord_fixed(0.88),
  FeaturePlot(ss2_glia.seur, c("Xylt1")) + coord_fixed(0.88),
  FeaturePlot(ss2_glia.seur, c("Pappa")) + coord_fixed(0.88), cols = 1
)

and the nGene distribution also get a difference.

DimPlot(ss2_glia.seur, group.by = 'nGene', cols = material.heat(1909)) + labs(title = "Number of genes")  + 
             guides(colour = guide_coloursteps(show.limits = TRUE,label=FALSE)) + coord_fixed(0.88)

sorry for still unavailable to add scale mark on the colorbar because of the conflicts of data formats inside.
lightblue should be >3500, and orange still <6000

ss2_glia's nGene distribution

cat("nGene quantile statistics of ss2_glia: \n" ); quantile(as.numeric(ss2_glia.seur@meta.data$nGene), probs=seq(0,1,0.05))
## nGene quantile statistics of ss2_glia:
##     0%     5%    10%    15%    20%    25%    30%    35%    40%    45%    50% 
##  566.0 3321.1 3663.8 3944.0 4123.6 4294.5 4412.4 4533.6 4643.2 4739.1 4842.0 
##    55%    60%    65%    70%    75%    80%    85%    90%    95%   100% 
## 4930.0 5021.8 5115.4 5211.6 5315.5 5421.0 5550.0 5718.0 5936.0 8069.0
h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene), breaks = 20, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)

d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))

title(paste0("nGene distribtusion for Glia_1(red), Glia_2(green), Glia_3(blue)"))

lines(x = d1$x, y=d1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))* diff(h1$breaks)[1],
      col="red1",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))* diff(h2$breaks)[1],
      col="green3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))* diff(h3$breaks)[1],
      col="blue3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1","Glia_2","Glia_3"),lty = 1,lwd=2,
       col=c("red","green","blue"))

is this nGene distribution a cause to drive the clustering or it's just somehow related but not the main reason ?
try to sub-cluster Glia_1.

comparing to complicated sub-clustering results of ss2_neur's:
level 1, into Chat+ and Nos1+,
level 2, into PEMNs/PIMNs/PINs/PSNs/PSVNs,
level 3, each again into 2-7 sub-clusters,

ss2_glia just partition into three parts roughly with big 'k'NN and no further biological things to talk about.
maybe it's just because ENS glia lack of existing knowledge, on the other hand,
indeed it's less complicated for having less varible PCs.

sub-clustering of Glia_1

batch info

cat("inlocal ss2_Glia_1 in each Mouse: \n");
## inlocal ss2_Glia_1 in each Mouse:
table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)[paste0("M",1:29)]
## 
##   M1   M2   M3   M4   M5   M6   M7   M8   M9  M10  M11  M12  M13 <NA>  M15  M16 
##    7    9    4   13   16   13  103   66   67   64   65   74   62         4   92 
##  M17  M18  M19  M20  M21  M22  M23  M24  M25  M26  M27  M28  M29 
##    1   57    1   37   59    4   81    2    1   27   48   40   57
min_cells <- 4
cat("ss2_Glia_1 mice: ",length(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)),
    "\nss2_mix count: ",sum(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID)),
    paste0("\n\nmin_cell < ",min_cells," : ",
           "\n    mice ",sum(table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID) < min_cells),
           "\n    count ",sum((table((ss2_glia.seur@meta.data %>% 
                               filter(inlocal == "Glia_1"))$Mouse_ID))[table((ss2_glia.seur@meta.data %>% 
                                    filter(inlocal == "Glia_1"))$Mouse_ID) < min_cells])))
## ss2_Glia_1 mice:  28 
## ss2_mix count:  1074 
## 
## min_cell < 4 : 
##     mice 4
##     count 5

sub-clustering test

# 
batch_use.Glia_1 <- (ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID
names(batch_use.Glia_1) <- rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))
batch_use.Glia_1 <- factor(batch_use.Glia_1)

#
#ss2_Glia_1.seur.test <- run_analysis(name='ss2_Glia_1.test',
#                               counts=ss2_glia.seur@assays[['RNA']]@counts[,rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))],
#                               do.batch=TRUE,
#                               batch.use=batch_use.Glia_1,
#                               min_cells=4,
#                               var_regex='^Cd|^Ig|^Rn|^mt|^Rp',
#                               num_pcs=9,
#                               clust_pcs=6,
#                               k=100)
# same issue should concern  
#ss2_Glia_1.seur.test$orig.ident <- factor("ss2_Glia_1.test")
#ss2_Glia_1.seur.test@active.ident <- factor("ss2_Glia_1.test")
#saveRDS(ss2_Glia_1.seur.test,"ss2_Glia_1.seur.rds")
ss2_Glia_1.seur.test <- readRDS("ss2_Glia_1.seur.rds")

Check PCA and tsne

PCs 1-6 should be fine

ElbowPlot(ss2_Glia_1.seur.test,ndims = 9)

PCs

DimPlot(ss2_Glia_1.seur.test, reduction = 'pca', dims = c(1,2))

DimPlot(ss2_Glia_1.seur.test, reduction = 'pca', dims = c(1,3))

DimPlot(ss2_Glia_1.seur.test, reduction = 'tsne')

how did k100 do to cluster Glia_1,
might be a little more to mixd one or two but not so sure about that

DimPlot(ss2_Glia_1.seur.test, group.by = 'phenograph.k100')

well then try to range a series of 'k'NNs, k=20 to 300,
for convinient storage of variables, use a 'List' object to store all the results

# 
#batch_use.Glia_1 <- (ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Mouse_ID
#names(batch_use.Glia_1) <- rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))
#batch_use.Glia_1 <- factor(batch_use.Glia_1)

#
#ss2_Glia_1.seur.range <- list(NULL)

#for(kkk in 20*(1:15)){
#  ss2_Glia_1.seur.test <- run_analysis(name='ss2_Glia_1.test',
#                               counts=ss2_glia.seur@assays[['RNA']]@counts[,rownames(ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))],
#                               do.batch=TRUE,
#                               batch.use=batch_use.Glia_1,
#                               min_cells=4,
#                               var_regex='^Cd|^Ig|^Rn|^mt|^Rp',
#                               num_pcs=9,
#                               clust_pcs=6,
#                               k=kkk)      
  # same issue should concern  
#  ss2_Glia_1.seur.test$orig.ident <- factor("ss2_Glia_1.test")
#  ss2_Glia_1.seur.test@active.ident <- factor("ss2_Glia_1.test")
#  
#  ss2_Glia_1.seur.range <- c(ss2_Glia_1.seur.range, ss2_Glia_1.seur.test)
#}
#rm(ss2_Glia_1.seur.test)


#
#saveRDS(ss2_Glia_1.seur.range, "ss2_Glia_1.seur.range.rds")
ss2_Glia_1.seur.range <- readRDS("ss2_Glia_1.seur.range.rds")

k=20:300 in 'List' '[[2]]' - '[[16]]'

ss2_Glia_1.seur.range
## [[1]]
## NULL
## 
## [[2]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[3]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1467 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[4]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[5]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1467 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[6]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[7]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[8]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1466 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[9]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1469 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[10]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[11]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1466 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[12]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1467 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[13]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1466 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[14]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1467 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[15]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1468 variable features)
##  2 dimensional reductions calculated: pca, tsne
## 
## [[16]]
## An object of class Seurat 
## 18482 features across 1074 samples within 1 assay 
## Active assay: RNA (18482 features, 1467 variable features)
##  2 dimensional reductions calculated: pca, tsne

how about 'k'NN 300-20 look like

multiplot(
  DimPlot(ss2_Glia_1.seur.range[[16]], group.by = 'phenograph.k300') + labs(title="k300"),
  DimPlot(ss2_Glia_1.seur.range[[15]], group.by = 'phenograph.k280') + labs(title="k280"),
  DimPlot(ss2_Glia_1.seur.range[[14]], group.by = 'phenograph.k260') + labs(title="k260"),
  DimPlot(ss2_Glia_1.seur.range[[13]], group.by = 'phenograph.k240') + labs(title="k240"),
  DimPlot(ss2_Glia_1.seur.range[[12]], group.by = 'phenograph.k220') + labs(title="k220"),
DimPlot(ss2_Glia_1.seur.range[[11]], group.by = 'phenograph.k200') + labs(title="k200"),
DimPlot(ss2_Glia_1.seur.range[[10]], group.by = 'phenograph.k180') + labs(title="k180"),
DimPlot(ss2_Glia_1.seur.range[[9]], group.by = 'phenograph.k160') + labs(title="k160"),
DimPlot(ss2_Glia_1.seur.range[[8]], group.by = 'phenograph.k140') + labs(title="k140"),
DimPlot(ss2_Glia_1.seur.range[[7]], group.by = 'phenograph.k120') + labs(title="k120"),
DimPlot(ss2_Glia_1.seur.range[[6]], group.by = 'phenograph.k100') + labs(title="k100"),
DimPlot(ss2_Glia_1.seur.range[[5]], group.by = 'phenograph.k80') + labs(title="k80"),
DimPlot(ss2_Glia_1.seur.range[[4]], group.by = 'phenograph.k60') + labs(title="k60"),
DimPlot(ss2_Glia_1.seur.range[[3]], group.by = 'phenograph.k40') + labs(title="k40"),
DimPlot(ss2_Glia_1.seur.range[[2]], group.by = 'phenograph.k20') + labs(title="k20"),
cols = 5)

plot them back to ss2_glia
add meta.data

ss2_glia.seur@meta.data$K300 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[16]]@meta.data),]$K300 <- ss2_Glia_1.seur.range[[16]]@meta.data$phenograph.k300
ss2_glia.seur@meta.data$K280 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[15]]@meta.data),]$K280 <- ss2_Glia_1.seur.range[[15]]@meta.data$phenograph.k280
ss2_glia.seur@meta.data$K260 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[14]]@meta.data),]$K260 <- ss2_Glia_1.seur.range[[14]]@meta.data$phenograph.k260
ss2_glia.seur@meta.data$K240 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[13]]@meta.data),]$K240 <- ss2_Glia_1.seur.range[[13]]@meta.data$phenograph.k240
ss2_glia.seur@meta.data$K220 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[12]]@meta.data),]$K220 <- ss2_Glia_1.seur.range[[12]]@meta.data$phenograph.k220

ss2_glia.seur@meta.data$K200 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[11]]@meta.data),]$K200 <- ss2_Glia_1.seur.range[[11]]@meta.data$phenograph.k200
ss2_glia.seur@meta.data$K180 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[10]]@meta.data),]$K180 <- ss2_Glia_1.seur.range[[10]]@meta.data$phenograph.k180
ss2_glia.seur@meta.data$K160 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[9]]@meta.data),]$K160 <- ss2_Glia_1.seur.range[[9]]@meta.data$phenograph.k160
ss2_glia.seur@meta.data$K140 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[8]]@meta.data),]$K140 <- ss2_Glia_1.seur.range[[8]]@meta.data$phenograph.k140
ss2_glia.seur@meta.data$K120 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[7]]@meta.data),]$K120 <- ss2_Glia_1.seur.range[[7]]@meta.data$phenograph.k120
ss2_glia.seur@meta.data$K100 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[6]]@meta.data),]$K100 <- ss2_Glia_1.seur.range[[6]]@meta.data$phenograph.k100
ss2_glia.seur@meta.data$K80 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[5]]@meta.data),]$K80 <- ss2_Glia_1.seur.range[[5]]@meta.data$phenograph.k80
ss2_glia.seur@meta.data$K60 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[4]]@meta.data),]$K60 <- ss2_Glia_1.seur.range[[4]]@meta.data$phenograph.k60
ss2_glia.seur@meta.data$K40 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[3]]@meta.data),]$K40 <- ss2_Glia_1.seur.range[[3]]@meta.data$phenograph.k40
ss2_glia.seur@meta.data$K20 <- ss2_glia.seur@meta.data$inlocal
ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[2]]@meta.data),]$K20 <- ss2_Glia_1.seur.range[[2]]@meta.data$phenograph.k20

plot with old Glia_2/3

multiplot(
  DimPlot(ss2_glia.seur, group.by = 'K300') + labs(title="K300"),
  DimPlot(ss2_glia.seur, group.by = 'K280') + labs(title="K280"),
  DimPlot(ss2_glia.seur, group.by = 'K260') + labs(title="K260"),
  DimPlot(ss2_glia.seur, group.by = 'K240') + labs(title="K240"),
  DimPlot(ss2_glia.seur, group.by = 'K220') + labs(title="K220"),
DimPlot(ss2_glia.seur, group.by = 'K200') + labs(title="K200"),
DimPlot(ss2_glia.seur, group.by = 'K180') + labs(title="K180"),
DimPlot(ss2_glia.seur, group.by = 'K160') + labs(title="K160"),
DimPlot(ss2_glia.seur, group.by = 'K140') + labs(title="K140"),
DimPlot(ss2_glia.seur, group.by = 'K120') + labs(title="K120"),
DimPlot(ss2_glia.seur, group.by = 'K100') + labs(title="K100"),
DimPlot(ss2_glia.seur, group.by = 'K80') + labs(title="K80"),
DimPlot(ss2_glia.seur, group.by = 'K60') + labs(title="K60"),
DimPlot(ss2_glia.seur, group.by = 'K40') + labs(title="K40"),
DimPlot(ss2_glia.seur, group.by = 'K20') + labs(title="K20"),
cols=5
)

visually, clustering into 2-4 sub-clusters could be fine,
but genes found upstairs tell us more possible here to choose K300 or K280 with nearly the same result
(next if find DEGs that would get more clusters, back here to use other 'K's)

let's call "0" Glia_1.0 and "1" Glia_1.1

#ss2_glia.seur <- readRDS("ss2_glia.seur.rds")
#ss2_glia.seur@meta.data$K300 <- ss2_glia.seur@meta.data$inlocal
#ss2_glia.seur@meta.data[rownames(ss2_Glia_1.seur.range[[16]]@meta.data),]$K300 <- ss2_Glia_1.seur.range[[16]]@meta.data$phenograph.k300

#ss2_glia.seur@meta.data$K300[ss2_glia.seur@meta.data$K300=="0"] <- "Glia_1.0"
#ss2_glia.seur@meta.data$K300[ss2_glia.seur@meta.data$K300=="1"] <- "Glia_1.1"
#saveRDS(ss2_glia.seur,"ss2_glia_new.seur.rds")
ss2_glia.seur <- readRDS("ss2_glia_new.seur.rds")
h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene), breaks = 20, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)

d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))

title(paste0("nGene distribtusion for Glia_1(red), Glia_2(green), Glia_3(blue)"))

lines(x = d1$x, y=d1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$nGene))* diff(h1$breaks)[1],
      col="red1",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2"))$nGene))* diff(h2$breaks)[1],
      col="green3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_3"))$nGene))* diff(h3$breaks)[1],
      col="blue3",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1","Glia_2","Glia_3"),lty = 1,lwd=2,
       col=c("red","green","blue"))

h <- hist(as.numeric(ss2_glia.seur@meta.data$nGene), breaks = 30, xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = T)
h1.1 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene), breaks = 30, 
             xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h1.0 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene), breaks = 30, 
             xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h2 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene), breaks = 30, 
           xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)
h3 <- hist(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene), breaks = 20, 
           xlab="", main="", xlim = c(0,8000), ylim = c(0,400), plot = F)

d <- density(as.numeric(ss2_glia.seur@meta.data$nGene))
d1.1 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene))
d1.0 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene))
d2 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene))
d3 <- density(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene))

title(paste0("nGene distribtusion for \nGlia_1.1(red), Glia_1.0(green), Glia_2(blue), Glia_3(purple)"))

lines(x = d1.1$x, y=d1.1$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.1"))$nGene))* diff(h1.1$breaks)[1],
      col="#F8766D",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d1.0$x, y=d1.0$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_1.0"))$nGene))* diff(h1.0$breaks)[1],
      col="#7CAE00",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d2$x, y=d2$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_2"))$nGene))* diff(h2$breaks)[1],
      col="#00BFC4",lwd=3, xlim = c(0,8000), ylim = c(0,400))
lines(x = d3$x, y=d3$y* length(as.numeric((ss2_glia.seur@meta.data %>% filter(K300 == "Glia_3"))$nGene))* diff(h3$breaks)[1],
      col="#C77CFF",lwd=3, xlim = c(0,8000), ylim = c(0,400))
legend(x=0,y=400,c("Glia_1.1","Glia_1.0","Glia_2","Glia_3"),lty = 1,lwd=2,
       col=c("#F8766D","#7CAE00","#00BFC4","#C77CFF"))

well not a very clean separation on nGene level, could see green(Glia_1.0) with a peak < 4000

DimPlot(ss2_glia.seur, group.by = 'K300', order = rev(c("Glia_1.1","Glia_1.0","Glia_2","Glia_3"))) + 
  coord_fixed(0.88)  + labs(title="inlocal new")

differential analysis

test with MAST, fit a hurdle model,
for mouse colon atlas, fit Yi with covariate:

Yi : standardzed log2(TP10K+1) expression vector for gene i across cells
X : variable reflecting cell subset membership (e.g. PSNs versus non-PSNs)
A : age associated with each cell (aged versus adult)
C : circadian phase for each cell (evening versus morning)
L : location for each cell (distal versus proximal)
S : sex for each cell (male versus female)
T : mouse model for each cell (Uchl1 versus Sox10)
N: scaled number of genes for each cell (i.e., cell complexity)

in paper, comparisons of different conditions on ss2_glia are not mentioned much,
but they are tested on ss2_neur, so here just do it the same way

to find DEGs between different conditions, used to add covariates, but got quite different results comparing to inpaper's
(ss2_neur, similar - different: Segment > Time > Age)
here just without other covariates except nGene

to find DEGs between sub-clusters, add covariates which would have effect on the comparison to do a regression to decrease the putative effects,
though in the end it turns out that nGene level might match the clusterings anyway,
still doubtful about if nGene distribution is the reason or result or sth. else but somehow related ~

to filter for final unique DEGs between sub-clusters, inpaper calculates the next highest values for each highly-regulated gene
but the results are confused and lack exact cutoffs of parameters to get a very close DEG list which is always a tricky process
maybe it's because DEGs between Glia_2/3 are not that unique, and no previous biological knowledge as standard
so no more discussion, except to set a next highest group to make the result look more confident, maybe

add one more contrast between ss2_Glia_1 and ss2_Glia_2&3 !
add condition contrasts on major group !

(ss2_glia no 'Model' contrast)

# build covariates dataframe
#     
ss2_glia.cov <- data.frame(inlocal = factor(as.character(ss2_glia.seur@meta.data$K300)),
    inpaper = factor(as.character(ss2_glia.seur@meta.data$Annotation)),
    Age = factor(as.character(ss2_glia.seur@meta.data$Age_Processed), levels = c("Old","Young")),
    Time = as.character(ss2_glia.seur@meta.data$Time_Processed),
    Segment = factor(as.character(ss2_glia.seur@meta.data$Segment_Processed), levels = c("Distal","Proximal")),
    Seg_raw = as.character(ss2_glia.seur@meta.data$Segment),
    Sex = factor(as.character(ss2_glia.seur@meta.data$Sex_Processed), levels = c("M","F")),
    Model = factor(as.character(ss2_glia.seur@meta.data$Model_Processed), levels = c("Uchl1","Sox10")),
    nGene = scale(as.numeric(ss2_glia.seur@meta.data$nGene)),
    Sample = factor(rownames(ss2_glia.seur@meta.data)))
ss2_glia.cov$Time[ss2_glia.cov$Time=="7AM"] <- "Morning"
ss2_glia.cov$Time[ss2_glia.cov$Time=="7PM"] <- "Evening"
ss2_glia.cov$Time <- factor(ss2_glia.cov$Time, levels = c("Evening","Morning"))
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="1"] <- "Seg1"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="2"] <- "Seg2"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="3"] <- "Seg3"
ss2_glia.cov$Seg_raw[ss2_glia.cov$Seg_raw=="4"] <- "Seg4"
ss2_glia.cov$Seg_raw <- factor(ss2_glia.cov$Seg_raw, levels = c("Seg1","Seg2","Seg3","Seg4"))

rownames(ss2_glia.cov) <- as.character(ss2_glia.cov$Sample)

ss2_glia.cov$major <- as.character(ss2_glia.seur@meta.data$inlocal)
ss2_glia.cov$major[grep("Glia_1",ss2_glia.cov$major)] <- "Glia_1"
ss2_glia.cov$major[grep("Glia_2",ss2_glia.cov$major)] <- "Glia_23"
ss2_glia.cov$major[grep("Glia_3",ss2_glia.cov$major)] <- "Glia_23"
ss2_glia.cov$major <- factor(ss2_glia.cov$major)
 
#ss2_glia.cov

differential analysis with MAST

# inlocal
#ss2_glia.masts_inlocal <- sapply(levels(factor(ss2_glia.cov$inlocal)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$inlocal == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Age","Time","Segment","Sex","Model","Sample")], 
#                 formula='~ nGene + ident + Age + Time + Segment + Sex + Model', 
#                lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal <- do.call(rbind,ss2_glia.masts_inlocal)

# major
#ss2_glia.masts_inlocal.major <- sapply(levels(factor(ss2_glia.cov$major)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$major == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Age","Time","Segment","Sex","Model","Sample")], 
#                 formula='~ nGene + ident + Age + Time + Segment + Sex + Model', 
#                lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.major <- do.call(rbind,ss2_glia.masts_inlocal.major)

# conditions
# Age
#ss2_glia.masts_inlocal.Age <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age <- do.call(rbind,ss2_glia.masts_inlocal.Age)

# Age_Glia1
#ss2_glia.masts_inlocal.Age_Glia1 <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age)) & (ss2_glia.cov$major == "Glia_1")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Age_Glia1)

# Age_Glia23
#ss2_glia.masts_inlocal.Age_Glia23 <- sapply(levels(factor(ss2_glia.cov$Age)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Age == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Age)) & (ss2_glia.cov$major == "Glia_23")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Age_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Age_Glia23)

# Time
#ss2_glia.masts_inlocal.Time <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time <- do.call(rbind,ss2_glia.masts_inlocal.Time)

# Time_Glia1
#ss2_glia.masts_inlocal.Time_Glia1 <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time)) & (ss2_glia.cov$major == "Glia_1")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Time_Glia1)

# Time_Glia23
#ss2_glia.masts_inlocal.Time_Glia23 <- sapply(levels(factor(ss2_glia.cov$Time)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Time == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Time)) & (ss2_glia.cov$major == "Glia_23")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Time_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Time_Glia23)



# Segment
#ss2_glia.masts_inlocal.Segment <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment <- do.call(rbind,ss2_glia.masts_inlocal.Segment)

# Segment_Glia1
#ss2_glia.masts_inlocal.Segment_Glia1 <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment)) & (ss2_glia.cov$major == "Glia_1")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Segment_Glia1)

# Segment_Glia23
#ss2_glia.masts_inlocal.Segment_Glia23 <- sapply(levels(factor(ss2_glia.cov$Segment)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Segment == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Segment)) & (ss2_glia.cov$major == "Glia_23")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Segment_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Segment_Glia23)


# Seg_raw
#ss2_glia.masts_inlocal.Seg_raw <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw)

# Seg_raw_Glia1
#ss2_glia.masts_inlocal.Seg_raw_Glia1 <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw)) & (ss2_glia.cov$major == "Glia_1")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw_Glia1)

# Seg_raw_Glia23
#ss2_glia.masts_inlocal.Seg_raw_Glia23 <- sapply(levels(factor(ss2_glia.cov$Seg_raw)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Seg_raw == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Seg_raw)) & (ss2_glia.cov$major == "Glia_23")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Seg_raw_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Seg_raw_Glia23)


# Sex
#ss2_glia.masts_inlocal.Sex <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex <- do.call(rbind,ss2_glia.masts_inlocal.Sex)

# Sex_Glia1
#ss2_glia.masts_inlocal.Sex_Glia1 <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex)) & (ss2_glia.cov$major == "Glia_1")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex_Glia1 <- do.call(rbind,ss2_glia.masts_inlocal.Sex_Glia1)

# Sex_Glia23
#ss2_glia.masts_inlocal.Sex_Glia23 <- sapply(levels(factor(ss2_glia.cov$Sex)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Sex == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Sex)) & (ss2_glia.cov$major == "Glia_23")]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Sex_Glia23 <- do.call(rbind,ss2_glia.masts_inlocal.Sex_Glia23)


# Model
#ss2_glia.masts_inlocal.Model <- sapply(levels(factor(ss2_glia.cov$Model)), function(ident){
#  ident.use = factor(ifelse(ss2_glia.cov$Model == ident, ident, 'Other'), levels=c('Other', ident))
#  cells.use = ss2_glia.cov$Sample[!is.na(as.character(ss2_glia.cov$Model))]
#  p.find_markers(seur=ss2_glia.seur, 
#                 ident.1 = ident, 
#                 ident.use=ident.use, 
#                 cells.use=cells.use,
#                 min_alph=.01, 
#                 min_fc=1.2, 
#                 max_cells=2500,
#                 covariates=ss2_glia.cov[,c("nGene","Sample")], 
#                 formula='~ nGene + ident', 
#                 lrt_regex=ident,n.cores=8)
#}, simplify=F)
#ss2_glia.masts_inlocal.Model <- do.call(rbind,ss2_glia.masts_inlocal.Model)

## save raw output
#write.table(protectgene(ss2_glia.masts_inlocal),"MAST_raw/ss2_glia.masts_inlocal.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major),"MAST_raw/ss2_glia.masts_inlocal.major.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Age),"MAST_raw/ss2_glia.masts_inlocal.Age.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Age_Glia1.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Age_Glia23.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Segment),"MAST_raw/ss2_glia.masts_inlocal.Segment.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Segment_Glia1.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Segment_Glia23.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia1.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia23.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Sex),"MAST_raw/ss2_glia.masts_inlocal.Sex.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Sex_Glia1.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Sex_Glia23.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Time),"MAST_raw/ss2_glia.masts_inlocal.Time.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1),"MAST_raw/ss2_glia.masts_inlocal.Time_Glia1.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23),"MAST_raw/ss2_glia.masts_inlocal.Time_Glia23.raw.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

this part would take much time, annotate the code when it's done.

ss2_glia.masts_inlocal <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.raw.csv",
                                             header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.major <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.major.raw.csv",
                                                   header = TRUE, stringsAsFactors = FALSE, sep = ","))

ss2_glia.masts_inlocal.Age <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Age_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age_Glia1.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Age_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Age_Glia23.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))

ss2_glia.masts_inlocal.Segment <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment.raw.csv",
                                                     header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Segment_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment_Glia1.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Segment_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Segment_Glia23.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))

ss2_glia.masts_inlocal.Seg_raw <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw.raw.csv",
                                                     header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Seg_raw_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia1.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Seg_raw_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Seg_raw_Glia23.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))

ss2_glia.masts_inlocal.Sex <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Sex_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex_Glia1.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Sex_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Sex_Glia23.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))

ss2_glia.masts_inlocal.Time <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time.raw.csv",
                                                  header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Time_Glia1 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time_Glia1.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))
ss2_glia.masts_inlocal.Time_Glia23 <- recgene(read.table("MAST_raw/ss2_glia.masts_inlocal.Time_Glia23.raw.csv",
                                                 header = TRUE, stringsAsFactors = FALSE, sep = ","))

filtering and figures

there're a few columns to filter for the final unique DEGs,
for ss2 data, here ignores contam.res (contaminated residuals for droplet-based filtering)
and would only filter with p.adjD<0.05 (coefD > 0 and sorted) to save DEGs' table,
(keep duplicated DEGs only in one group with biggest coefD instead of using 'next highest' things)

if filter others like mean expression (mu, mean), expressed cells ratio(alpha) and log2FC et al. just for repeating figures in paper..

filt1

filt1_mast <- function(mast_raw){
  mast_filt <- mast_raw %>% filter(padjD<0.05) %>% filter(coefD >0) %>% arrange(desc(coefD))
  mast_filt <- mast_filt[!duplicated(mast_filt$gene),]
  mast_filt <- mast_filt %>% arrange(contrast,desc(coefD))
  return(mast_filt)
}
ss2_glia.masts_inlocal.filt1 <- filt1_mast(ss2_glia.masts_inlocal)
ss2_glia.masts_inlocal.major.filt1 <- filt1_mast(ss2_glia.masts_inlocal.major)

ss2_glia.masts_inlocal.Age.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age)
ss2_glia.masts_inlocal.Age_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age_Glia1)
ss2_glia.masts_inlocal.Age_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Age_Glia23)

ss2_glia.masts_inlocal.Segment.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment)
ss2_glia.masts_inlocal.Segment_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment_Glia1)
ss2_glia.masts_inlocal.Segment_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Segment_Glia23)

ss2_glia.masts_inlocal.Seg_raw.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw)
ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw_Glia1)
ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Seg_raw_Glia23)

ss2_glia.masts_inlocal.Sex.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex)
ss2_glia.masts_inlocal.Sex_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex_Glia1)
ss2_glia.masts_inlocal.Sex_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Sex_Glia23)

ss2_glia.masts_inlocal.Time.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time)
ss2_glia.masts_inlocal.Time_Glia1.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time_Glia1)
ss2_glia.masts_inlocal.Time_Glia23.filt1 <- filt1_mast(ss2_glia.masts_inlocal.Time_Glia23)
#write.table(protectgene(ss2_glia.masts_inlocal.filt1),"MAST_filt1/ss2_glia.masts_inlocal.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major.filt1),"MAST_filt1/ss2_glia.masts_inlocal.major.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Age.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age_Glia1.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Age_Glia23.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Segment.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment_Glia1.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Segment_Glia23.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Sex.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex_Glia1.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Sex_Glia23.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Time.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time_Glia1.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23.filt1),"MAST_filt1/ss2_glia.masts_inlocal.Time_Glia23.filt1.csv", 
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

several columns in MAST results could be candidate cutoffs to filter for 'significant' DEGs

Column Description
ident Cell subset or lineage
gene Gene
coefD Coefficient (discrete term)
pvalD P-value (discrete term)
padjD Adjusted p-value (discrete term)
coefC Coefficient (continuous term)
pvalC P-value (continuous term)
padjC Adjusted p-value (continuous term)
mastfc Fold change estimated by MAST
alpha Fraction of expressing cells
ref_alpha Fraction of expressing cells (reference group)
mu Mean log2(TP10K) in expressing cells
ref_mu Mean log2(TP10K) in expressing cells (reference group)
mean Mean log2(TP10K) in all cells
ref_mean Mean log2(TP10K) in all cells (reference group)
log2fc log2 fold change (relative to reference group)
contam.res Residual (contamination model)
Note: reference group = all other cells (for example, reference group for Tregs = all non-Tregs)

in paper, comparing to ss2_neur, analysis of ss2_glia is very absent.
maybe it is because glia can not be the key point considering the poor number of subclusters.
(is it born this way ? Or, showed like this for some quality/technique limitations ?)

besides, the highlighted markers: Gfra2, Slc18a2, Ntsr1, don't look good from the start.

FeaturePlot(ss2_glia.seur,c("Gfra2","Slc18a2","Ntsr1"))

multiplot(
easy_violin(seur=ss2_glia.seur,
            genes="Gfra2", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,6),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Slc18a2", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,5),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Ntsr1", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,3),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
cols = 1)

check labelings in fig.S2E

tested cutoffs using coefD + padjD + additional "next highest" fold change, can't directly get these genes.
for Glia 1, some labelings might could be fine.(like Col5a3, Kcna2, Cadm2, but where's Gfra2 ?)

fig.S2E, Glia 1 labeled genes.

FeaturePlot(ss2_glia.seur,c("Col5a3","Kcna2","Frmd4a","Abca8b","Clvs1","Prnp","Cadm2","Kcnn2",
                            "Aatk","Olfm2","Fyn","Ntng1","Apba2","Nav1","Kcnk13","Zfyve28"))

for Glia 2/3, hard to say...

ss2_glia clusters.

multiplot(
  DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
  DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
  DimPlot(ss2_glia.seur, group.by = 'K300', label = T) + labs(title = "ss2_glia inlocal new"),
  cols=3
)

fig.S2E, Glia 2 labeled genes.

FeaturePlot(ss2_glia.seur,c("Slc18a2","Mapk10","Fam184b","Trim9","Lsamp","Erc2","Auts2","Cpe",
                            "Sobp","Dio2","Ank2","Ppp2r2c","Kcnc4","Kcnab2","Ctnnd2"))

fig.S2E, Glia 3 labeled genes.
(Dkk3/Olfml3/Clu/Ncan/Omg/Ntsr1 look like that they have some pattern)

FeaturePlot(ss2_glia.seur,c("Bcan","Dkk3","Tspan18","Olfml3","Fam184b","Lsamp","Clu","Trim9",
                            "Mest","Ncan","Omg","Ntsr1","Auts2","Cdh2"))

multiplot(
easy_violin(seur=ss2_glia.seur,
            genes="Dkk3", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,5),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Olfml3", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,5),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Clu", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Ncan", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Omg", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Ntsr1", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
cols = 1)

filt2

tested "mu","mean","foldchange","n","alpha", still confused about how to exactly do it.
according to the nGene distribution upstairs,
may think "n" or "alpha" which represents "expressed cells (ratio)" could have more weights than only using expressing levels.
finally just use quantile 95% of "diff_alpha = alpha - ref_alpha" as cutoff.

# min - max of diff_alpha  
range(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha)
## [1] -0.1274755  0.4270345
# quantile with step 0.05
test_quant <- data.frame(quantile(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant) <- "alpha-ref_alpha"
test_quant
##      alpha-ref_alpha
## 0%      -0.127475491
## 5%      -0.052852965
## 10%     -0.032787456
## 15%     -0.015386494
## 20%      0.001583513
## 25%      0.012554546
## 30%      0.021046425
## 35%      0.031396877
## 40%      0.043049713
## 45%      0.053028822
## 50%      0.062352190
## 55%      0.073282255
## 60%      0.083200957
## 65%      0.092046598
## 70%      0.101332530
## 75%      0.113223128
## 80%      0.123605688
## 85%      0.135671889
## 90%      0.152707289
## 95%      0.182411398
## 100%     0.427034513

choose 95% as cutoff

par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.filt1$alpha - ss2_glia.masts_inlocal.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant[,1]+0.01*test_quant[,1]/abs(test_quant[,1]),round(test_quant[,1],3),cex=0.8)

filt2

modification from 'future check of ss2_neur':
if one gene expressed in most cells, or with similar ratio in all groups,
but uniquely high in one(or two), this 'alpha - ref_alpha' filtering couldn't find them.

so filt2 add condition:
or "genes with alpha > 'quantile 20%' and 'mu - ref_mu' > 'quantile 95%'"

filt2_mast <- function(mast_raw){
  mast_filt <- mast_raw %>% filter(padjD<0.05) %>% filter(coefD >0) %>% arrange(desc(coefD))
  mast_filt <- mast_filt[!duplicated(mast_filt$gene),]
  mast_filt <- mast_filt %>% arrange(contrast,desc(coefD))
  
  mast_filt$diff_alpha <- mast_filt$alpha - mast_filt$ref_alpha
  mast_filt$diff_mu <- mast_filt$mu - mast_filt$ref_mu 
  
  mast_filt <- mast_filt %>% filter(diff_alpha> as.numeric((quantile(mast_filt$diff_alpha,
                                                                     probs = seq(0,1,0.05)))["95%"]) |
                                      (diff_mu > as.numeric((quantile(mast_filt$diff_mu,
                                                                      probs = seq(0,1,0.05)))["95%"]) &
                                         alpha > as.numeric((quantile(mast_filt$alpha,
                                                                      probs = seq(0,1,0.05)))["20%"])))
  return(mast_filt)
}
ss2_glia.masts_inlocal.filt2 <- filt2_mast(ss2_glia.masts_inlocal)
ss2_glia.masts_inlocal.major.filt2 <- filt2_mast(ss2_glia.masts_inlocal.major)

ss2_glia.masts_inlocal.Age.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age)
ss2_glia.masts_inlocal.Age_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age_Glia1)
ss2_glia.masts_inlocal.Age_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Age_Glia23)

ss2_glia.masts_inlocal.Segment.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment)
ss2_glia.masts_inlocal.Segment_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment_Glia1)
ss2_glia.masts_inlocal.Segment_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Segment_Glia23)

ss2_glia.masts_inlocal.Seg_raw.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw)
ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw_Glia1)
ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Seg_raw_Glia23)

ss2_glia.masts_inlocal.Sex.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex)
ss2_glia.masts_inlocal.Sex_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex_Glia1)
ss2_glia.masts_inlocal.Sex_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Sex_Glia23)

ss2_glia.masts_inlocal.Time.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time)
ss2_glia.masts_inlocal.Time_Glia1.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time_Glia1)
ss2_glia.masts_inlocal.Time_Glia23.filt2 <- filt2_mast(ss2_glia.masts_inlocal.Time_Glia23)
#write.table(protectgene(ss2_glia.masts_inlocal.filt2),"MAST_filt2/ss2_glia.masts_inlocal.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.major.filt2),"MAST_filt2/ss2_glia.masts_inlocal.major.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Age.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age_Glia1.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Age_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Age_Glia23.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Segment.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment_Glia1.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Segment_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Segment_Glia23.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw_Glia1.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Seg_raw_Glia23.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Sex.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex_Glia1.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Sex_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Sex_Glia23.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

#write.table(protectgene(ss2_glia.masts_inlocal.Time.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia1.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time_Glia1.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")
#write.table(protectgene(ss2_glia.masts_inlocal.Time_Glia23.filt2),"MAST_filt2/ss2_glia.masts_inlocal.Time_Glia23.filt2.csv",
#            col.names = TRUE, row.names = FALSE, quote = FALSE, sep = ",")

how many DEGs in .filt2

dim(ss2_glia.masts_inlocal.filt2)[1]
## [1] 285
ss2_glia.masts_inlocal.filt2[1:5,]
##     gene      contrast    coefD        pvalD     coefC        pvalC    mastfc
## 1   Insc identGlia_1.0 1.476740 4.673997e-49 0.5382472 1.112119e-10 0.6157598
## 2 Col5a3 identGlia_1.0 1.314662 2.607490e-36 0.4858169 5.523741e-12 0.7822629
## 3   Lcp2 identGlia_1.0 1.177220 1.264616e-26 0.2781701 6.778089e-03 0.1222934
## 4  Cspg4 identGlia_1.0 1.163933 1.941090e-28 0.1332955 1.711788e-01 0.4288304
## 5 Igsf21 identGlia_1.0 1.121864 4.956467e-25 0.5641030 1.640965e-14 0.4847441
##          pvalH        padjD        padjC        padjH       ref   n ref_n
## 1 7.968775e-57 2.101429e-45 1.063848e-08 8.956903e-54 Glia_1.0- 392   812
## 2 2.014134e-45 5.861637e-33 6.898538e-10 8.232316e-43 Glia_1.0- 424  1111
## 3 4.369558e-27 1.137143e-23 8.764858e-02 8.541536e-25 Glia_1.0- 208   353
## 4 1.063528e-27 2.181786e-25 4.535179e-01 2.173465e-25 Glia_1.0- 229   416
## 5 1.039548e-36 3.714046e-22 2.544062e-12 3.338436e-34 Glia_1.0- 439  1339
##       alpha ref_alpha       mu   ref_mu      mean   ref_mean     total
## 1 0.7153285 0.3533507 2.404424 1.945548 1.9211019  0.4447211 0.3988714
## 2 0.7737226 0.4834639 2.486427 2.094925 2.1163156  1.0464054 0.3336074
## 3 0.3795620 0.1536118 1.566902 1.247483 0.1693096 -1.4551561 0.4237189
## 4 0.4178832 0.1810270 1.704606 1.704008 0.4457773 -0.7617153 0.3551336
## 5 0.8010949 0.5826806 2.311550 1.554877 1.9915947  0.7756543 0.3564757
##   ref_total   log2fc    ident diff_alpha      diff_mu
## 1 0.6011286 1.476381 Glia_1.0  0.3619777 0.4588758345
## 2 0.6663926 1.069910 Glia_1.0  0.2902587 0.3915018136
## 3 0.5762811 1.624466 Glia_1.0  0.2259502 0.3194193922
## 4 0.6448664 1.207493 Glia_1.0  0.2368562 0.0005976068
## 5 0.6435243 1.215940 Glia_1.0  0.2184143 0.7566724910

volcano plots similar to fig.S2E

plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.0"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.0"))$padjD)),
     pch=16,col="grey",
     main="Glia_1.0", xlab="DE coefficient (Glia_1.0 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.0"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.0"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$coefD+0.05,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$padjD)+1),
     (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0"))$gene,cex=0.6)

legend(x=0,y=45,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_1.1"))$padjD)),
     pch=16,col="grey",
     main="Glia_1.1", xlab="DE coefficient (Glia_1.1 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_1.1"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$coefD+0.05,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$padjD)+1),
     (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1"))$gene,cex=0.6)

legend(x=0,y=56,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_2"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_2"))$padjD)),
     pch=16,col="grey",
     main="Glia_2", xlab="DE coefficient (Glia_2 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.03,y=1.8,"padjD < 0.05",cex=0.75)

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_2"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_2"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$coefD+0.05,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$padjD)+0.5),
     (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2"))$gene,cex=0.6)

legend(x=0,y=25,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

plot(data.frame(coefD=(ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_3"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal %>% filter(coefD > 0 & contrast=="identGlia_3"))$padjD)),
     pch=16,col="grey",
     main="Glia_3", xlab="DE coefficient (Glia_3 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.12,y=2.5,"padjD < 0.05",cex=0.75)

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_3"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt1 %>% filter(contrast=="identGlia_3"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$coefD+0.05,
                logP=-log10((ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$padjD)+1),
     (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3"))$gene,cex=0.6)

legend(x=0,y=52,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

as displayed upstairs, filt2 filtering for 'diff_alpha' basically get genes with top '-log10(padjD)' which would make sense logically.
however, if it's 'really significant' ? I have no idea. could only say 'putative' or 'candidate'. next check these DEGs.

DEGs of subclusters

clusters

multiplot(
  DimPlot(ss2_glia.seur, group.by = 'Annotation', label = T) + labs(title = "ss2_glia inpaper"),
  DimPlot(ss2_glia.seur, group.by = 'inlocal', label = T) + labs(title = "ss2_glia inlocal"),
  DimPlot(ss2_glia.seur, group.by = 'K300', label = T) + labs(title = "ss2_glia inlocal new"),
  cols=3
)

Glia_1.1 DEGs

left top group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_1.1", cols = "#7CAE00",
        pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1") %>% arrange(padjD))$gene[1:16])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.1") %>% arrange(padjD))$gene[17:32])

Glia_1.0 DEGs

left down group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_1.0", cols = "#F8766D", 
        pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_1.0") %>% arrange(padjD))$gene[1:16])

Glia_2 DEGs

right outside group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_2", cols = "#00BFC4", pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[1:16])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[17:32])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[33:48])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_2") %>% arrange(padjD))$gene[49:64],ncol = 4)

Glia_3 DEGs

right inside group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = ss2_glia.seur$K300=="Glia_3", cols = "#C77CFF", 
        pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[1:16])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[17:32])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[33:48])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[49:64])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[65:80])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[81:96])

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt2 %>% filter(contrast=="identGlia_3") %>% arrange(padjD))$gene[97:112])

conclusion of ss2_glia DEGs

comparing to ss2_neur, in spite of similar cell number, not many DEGs could be highly unique.
it could be possible here glia in ENS have dynamic stages rather than distant subgroups.

these filt2 DEGs just show genes that might be more significant than other unlabeled,
but if there's one gene with alpha=15% and ref_alpha=0 which is more like a unique identifier, it wouldn't be here,
test that situation:

quantile(ss2_glia.masts_inlocal.filt1$ref_alpha, probs = seq(0,1,0.05))
##           0%           5%          10%          15%          20%          25% 
## 0.0004191115 0.0291829760 0.0591208726 0.0854779492 0.1131015404 0.1369938686 
##          30%          35%          40%          45%          50%          55% 
## 0.1594912000 0.1917808000 0.2251219929 0.2592955000 0.2931756602 0.3359116000 
##          60%          65%          70%          75%          80%          85% 
## 0.3746736292 0.4182505673 0.4702905994 0.5263103280 0.5891329584 0.6595216693 
##          90%          95%         100% 
## 0.7386141535 0.8518021794 0.9974853311

choose 'ref_alpha=0.06' < 'quantile 10%' as cutoff, all genes with '0.06 < alpha - ref_alpha < 0.182':
('>0.182' already found)

(ss2_glia.masts_inlocal.filt1 %>% filter(ref_alpha<0.06 & ((alpha - 0.06) > ref_alpha) & (alpha - ref_alpha)<0.182)
 )[,c("gene","contrast","coefD","n","alpha","ref_alpha","mu","ref_mu")]
##             gene      contrast     coefD   n      alpha  ref_alpha         mu
## 1           Artn identGlia_1.0 1.7443915  57 0.10401460 0.02132289  1.5778663
## 2         Emx2os identGlia_1.0 0.9296810  72 0.13138686 0.05961706 -3.8570291
## 3       Cdc42ep3 identGlia_1.0 0.8560334  71 0.12956204 0.05918190  1.4756956
## 4         Cxcl12 identGlia_1.1 1.8005549  52 0.11304348 0.02095557  2.9995314
## 5       Marveld1 identGlia_1.1 1.7675631  51 0.11086957 0.02430847  1.9330923
## 6            Mal identGlia_1.1 1.7492440  83 0.18043478 0.03939648  1.9919836
## 7         Prss23 identGlia_1.1 1.6120380  64 0.13913043 0.03310981  2.8597595
## 8          Flrt3 identGlia_1.1 1.5226250  42 0.09130435 0.02849958  1.5014182
## 9         Apcdd1 identGlia_1.1 1.4689436  52 0.11304348 0.03227158  1.9511701
## 10       Aldh1a1 identGlia_1.1 1.4623211  64 0.13913043 0.05196982  1.7287784
## 11       Bhlhe40 identGlia_1.1 1.4100360  89 0.19347826 0.05867561  2.3615038
## 12         Kcnc4   identGlia_2 0.8833450 134 0.12934363 0.04143646  0.6335837
## 13        Scube3   identGlia_2 0.6773048 129 0.12451737 0.05303867 -0.1776031
## 14         P2rx6   identGlia_2 0.5770731 126 0.12162162 0.05801105  1.0917545
## 15          Dio2   identGlia_2 0.4275244 159 0.15347490 0.05966851  0.9981773
## 16        Gm6568   identGlia_3 1.0738720 108 0.13466330 0.04207436 -1.8670250
## 17        Pcdhb9   identGlia_3 1.0718440 122 0.15211970 0.05724070  2.4847540
## 18          Otor   identGlia_3 1.0037820 140 0.17456360 0.05283757  2.4753690
## 19           Omg   identGlia_3 0.9251344 122 0.15211970 0.05234834  1.4002720
## 20         Smpd1   identGlia_3 0.9187469 100 0.12468830 0.04990215  1.5836070
## 21 6330403K07Rik   identGlia_3 0.6456049  89 0.11097260 0.04647750  1.4924780
## 22         Wnt11   identGlia_3 0.6386678  97 0.12094760 0.05675147  0.8139619
##        ref_mu
## 1   1.4372609
## 2  -3.5065562
## 3   1.6565969
## 4   1.4024947
## 5   1.3296913
## 6   1.1450324
## 7   1.7718143
## 8   0.8600446
## 9   0.1580686
## 10 -0.4238265
## 11  1.3402953
## 12 -0.3412070
## 13  0.2276300
## 14  0.6835485
## 15  0.9536753
## 16 -1.3687070
## 17  2.2399820
## 18  1.4570760
## 19  0.8250343
## 20  1.7753680
## 21  1.2886020
## 22  0.7761727

could see Glia_1.0 finally gets a relatively better one, 'Artn',

and Glia_1.1 gets a familiar gene, 'Aldh1a1', which is investigated by Christian Wolfrum Lab in
"snRNA-seq reveals a subpopulation of adipocytes that regulates thermogenesis",
this paper was published less than two months later

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.filt1 %>% filter(ref_alpha<0.06 & ((alpha - 0.06) > ref_alpha) & (alpha - ref_alpha)<0.182))[,c("gene","contrast","coefD","n","alpha","ref_alpha")]$gene)

multiplot(
easy_violin(seur=ss2_glia.seur,
            genes="Artn", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Aldh1a1", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
easy_violin(seur=ss2_glia.seur,
            genes="Omg", 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.seur@meta.data[,"K300"], 
            cells=rownames(ss2_glia.seur@meta.data), 
            do.facet=FALSE,
            facet_by = NULL, 
            ylim=c(0,4),
            color.fill = glia.colors.inlocal,
            color.point = glia.colorpoint.inlocal),
cols = 1)

Pseudotime

how about UMAP result comparing to tSNE ?
and what does the putative cell trajectory look like ?

#ss2_glia.seur_traj <- ss2_glia.seur
#ss2_glia.seur_traj <- RunUMAP(ss2_glia.seur_traj, dims=1:9)
#saveRDS(ss2_glia.seur_traj,"ss2_glia.seur_traj.rds")
ss2_glia.seur_traj <- readRDS("ss2_glia.seur_traj.rds")
multiplot(
DimPlot(ss2_glia.seur_traj,reduction = 'tsne', group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur_traj,reduction = 'umap', group.by = "K300",label = T) + labs(title = "ss2_glia inlocal new"),
cols = 1)

visually, UMAP result gives a clearer understanding of the difference between clusters.

DiffusionMap using DEGs or PCs

ref. https://broadinstitute.github.io/2020_scWorkshop/trajectory-inference.html

diffusion_test <- DiffusionMap(t(data.frame(ss2_glia.seur_traj@assays[['RNA']]@data))[,ss2_glia.masts_inlocal.filt2$gene])
diffusion_testpca <- DiffusionMap(data.frame(ss2_glia.seur_traj@reductions[['pca']]@cell.embeddings)[,1:9])
diffusion_tmp <- data.frame(DC1 = eigenvectors(diffusion_test)[,1],
                            DC2 = eigenvectors(diffusion_test)[,2],
                            label = ss2_glia.seur_traj$K300)
head(diffusion_tmp)
##                      DC1           DC2    label
## M1_S1.S364 -0.0170155406 -0.0040727896   Glia_3
## M1_S1.S291  0.0268621785 -0.0137390705 Glia_1.1
## M1_S1.S350  0.0395379687  0.0004830339 Glia_1.1
## M1_S1.S352 -0.0092287037  0.0228178496   Glia_2
## M1_S1.S328 -0.0134275963  0.0133168603   Glia_3
## M1_S1.S327  0.0001965577  0.0156179767   Glia_2
diffusion_pca <- data.frame(DC1 = eigenvectors(diffusion_testpca)[,1],
                            DC2 = eigenvectors(diffusion_testpca)[,2],
                            label = ss2_glia.seur_traj$K300)
head(diffusion_pca)
##                     DC1         DC2    label
## M1_S1.S364  0.015105606 -0.01506936   Glia_3
## M1_S1.S291 -0.025148810  0.01540450 Glia_1.1
## M1_S1.S350 -0.032110815 -0.01779140 Glia_1.1
## M1_S1.S352  0.015109798 -0.01287728   Glia_2
## M1_S1.S328  0.014539857 -0.01412120   Glia_3
## M1_S1.S327  0.008757453  0.01557597   Glia_2
multiplot(
ggplot(diffusion_tmp, aes(x = DC1, y = DC2, colour = label)) +
    geom_point(cex=1.0) + 
    scale_color_manual(values=c("#F8766D","#7CAE00","#00BFC4","#C77CFF")) +
    #ggthemes::scale_color_tableau() + 
    xlab("Diffusion component 1") + 
    ylab("Diffusion component 2") +
    theme_classic() + labs(title=("DM with DEGs")),
ggplot(diffusion_pca, aes(x = DC1, y = DC2, colour = label)) +
    geom_point(cex=1.0) + 
    scale_color_manual(values=c("#F8766D","#7CAE00","#00BFC4","#C77CFF")) +
    #ggthemes::scale_color_tableau() + 
    xlab("Diffusion component 1") + 
    ylab("Diffusion component 2") +
    theme_classic() + labs(title=("DM with PCs")),
cols = 1)

Slingshot pseudotime on UMAP

ref. https://bustools.github.io/BUS_notebooks_R/slingshot.html

sds_umap <- slingshot(Embeddings(ss2_glia.seur_traj, "umap"), clusterLabels = ss2_glia.seur_traj$K300)
## Using full covariance matrix
sds_umap
## class: SlingshotDataSet 
## 
##  Samples Dimensions
##     3039          2
## 
## lineages: 1 
## Lineage1: Glia_3  Glia_2  Glia_1.0  Glia_1.1  
## 
## curves: 1 
## Curve1: Length: 23.846   Samples: 3039
# as people described, to plot Slingshot result, may need to give each cell a color  
sds_colors <- ss2_glia.seur_traj$K300
clust_colors <- c("#7CAE00","#F8766D","#00BFC4","#C77CFF")
clust_names <- paste0("Glia_",c("1.1","1.0","2","3"))
for(i in 1:length(clust_colors)){
  sds_colors[sds_colors==clust_names[i]] <- clust_colors[i]
}
plot(reducedDims(sds_umap), col = sds_colors, pch=16, cex = 0.35, asp = 1.25,
     ylim=c(-5,6),main="Slingshot on UMAP")
lines(sds_umap, lwd=2, type = 'lineages', col = 'black')
legend(x=-12,y=6,clust_names,cex=0.65,
       col=clust_colors,pch=c(16,16,16))

seems the trajectory is like Glia_1.1 <--> Glia_1.0 <--> Glia_2 <--> Glia_3.

well, no direction yet,
here just take a look, all those methods are doing the same thing: Dimension Reduction,
which one is better has been discussed a lot,
just as https://broadinstitute.github.io/2020_scWorkshop/trajectory-inference.html,
those google slides make a good introduction.

additionally check pseudotime in monocle3.

Monocle3 Pseudotime

ref. https://cole-trapnell-lab.github.io/monocle3/docs/trajectories/#learn-graph

# perform seruat v3 object to monocle v3 trajectory analysis  
# ref. https://github.com/satijalab/seurat/issues/1658  
gene_annt <- as.data.frame(rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings),
                           row.names = rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings))
colnames(gene_annt) <- "gene_short_name"
# genes in @feature.loadings is the seurat used variable genes ! 
# so here just use those about 1500 features instead of whole >10k genes.  

cell_meta <- as.data.frame(ss2_glia.seur_traj@meta.data)

new_mtx <- ss2_glia.seur_traj@assays[['RNA']]@counts
new_mtx <- new_mtx[rownames(ss2_glia.seur_traj@reductions[['pca']]@feature.loadings),]

cds <- new_cell_data_set(new_mtx,
                         cell_metadata = cell_meta,
                         gene_metadata = gene_annt)

recreate.partition <- c(rep(1,length(cds@colData@rownames)))
#recreate.partition <- ss2_glia.seur_traj@meta.data$K300
names(recreate.partition) <- cds@colData@rownames
recreate.partition <- as.factor(recreate.partition)

cds@clusters@listData[['UMAP']][['partitions']] <- recreate.partition
#cds@clusters@listData[['tSNE']][['partitions']] <- recreate.partition

list_clusters <- ss2_glia.seur_traj@meta.data$K300
names(list_clusters) <- rownames(ss2_glia.seur_traj@meta.data)

cds@clusters@listData[['UMAP']][['clusters']] <- list_clusters
#cds@clusters@listData[['tSNE']][['clusters']] <- list_clusters

cds@clusters@listData[['UMAP']][['louvarin_res']] <- "NA"
#cds@clusters@listData[['tSNE']][['louvarin_res']] <- "NA"
cds@int_colData@listData[['reducedDims']][["UMAP"]] <- ss2_glia.seur_traj@reductions[['umap']]@cell.embeddings
#cds@int_colData@listData[['reducedDims']][["tSNE"]] <- ss2_glia.seur_traj@reductions[['tsne']]@cell.embeddings

cds@preprocess_aux$gene_loadings <- ss2_glia.seur_traj@reductions[["pca"]]@feature.loadings
# UMAP from Seurat v3 to Monocle v3 done.  

cds <- learn_graph(cds)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%

similar to DM, but more branches making it complex.

root_cells <- rownames(ss2_glia.seur_traj@meta.data %>% filter(K300 == "Glia_1.1") )[
  order((ss2_glia.seur_traj@meta.data %>% filter(K300 == "Glia_1.1"))$nGene)[1:10]]
cds <- order_cells(cds,root_cells = root_cells )

manually choose 10 cells from Glia_1.1 with the smallest nGene values as 'root_cells',
so that there are directions starting in color 0:'dark purple'.

DimPlot(ss2_glia.seur_traj, group.by = 'nGene', reduction = 'umap',cols = material.heat(1909)) + 
             labs(title = "nGene distribution")  + 
             guides(colour = guide_coloursteps(show.limits = TRUE,label=FALSE))

this pseudotime is highly related to nGene map,
maybe ss2 expression level normalized by 'CPM' is naturally with 'nGene pattern',
making this result nonsense. hope the following 10x data by 'UMI count' would provide support.

according to analysis above, can't help to think if there are only two major groups, next check 'Glia_1 vs Glia_2/3'

DEGs of 'Glia_1 vs Glia_2/3'

nuclei info

table(ss2_glia.cov$major)
## 
##  Glia_1 Glia_23 
##    1074    1965

volcano plots

# quantile with step 0.05
test_quant.major <- data.frame(quantile(ss2_glia.masts_inlocal.major.filt1$alpha - ss2_glia.masts_inlocal.major.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.major) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.major.filt1$alpha - ss2_glia.masts_inlocal.major.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.6))
text(x=0:20*1.2+0.65,y=test_quant.major[,1]+0.02*test_quant.major[,1]/abs(test_quant.major[,1]),round(test_quant.major[,1],3),cex=0.7)

plot(data.frame(coefD=(ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_1"))$padjD)),
     pch=16,col="grey",
     main="Glia_1", xlab="DE coefficient (Glia_1 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.05,y=2.5,"padjD < 0.05",cex=0.65)

points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_1"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$coefD+0.1,
                logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$padjD)+0.8),
     (ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene,cex=0.6)

legend(x=0,y=95,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

plot(data.frame(coefD=(ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_23"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major %>% filter(coefD > 0 & contrast=="identGlia_23"))$padjD)),
     pch=16,col="grey",
     main="Glia_23", xlab="DE coefficient (Glia_23 vs. others)", ylab="-log10(adjusted p-value)")
#abline(v=0.5,lty=2)
#text(x=0.7,y=45,"coefD > 0.5",cex=0.8)
abline(h=-log10(0.05),lty=2)
text(x=0.05,y=2.5,"padjD < 0.05",cex=0.65)

points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_23"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major.filt1 %>% filter(contrast=="identGlia_23"))$padjD)),
       pch=16,col="lightblue")

points(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$coefD,
                logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$padjD)),
       pch=16,col="darkred")
text(data.frame(coefD=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$coefD+0.06,
                logP=-log10((ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$padjD)+1.5),
     (ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene,cex=0.6)

legend(x=0,y=120,c("raw","filt1","filt2"),cex=1,
       col=c("grey","lightblue","darkred"),pch=c(16,16,16))

Glia_1 DEGs

left group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = grepl("Glia_1",ss2_glia.seur$K300), cols = c("#7CAE00","#F8766D"),
        pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

(sorted by coefD, left -> right, up -> down)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[1:83], ncol = 4)

(sorted by coefD, up -> down, left -> right)

multiplot(
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[1:20], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[21:40], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[41:60], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1"))$gene[61:80], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
cols = 4)

easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_1" ))$gene[81:83], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6))

Glia_23 DEGs

right group

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'K300',cells = grepl("Glia_2|Glia_3",ss2_glia.seur$K300), cols = c("#00BFC4","#C77CFF"),
        pt.size = 0.5,label = T) + 
  labs(title = "ss2_glia inlocal new") + coord_cartesian(xlim = c(-50,42), ylim = c(-40,35)),
cols = 1)

(sorted by coefD, left -> right, up -> down)

FeaturePlot(ss2_glia.seur,
            (ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene[1:165],ncol = 4)

(sorted by coefD, up -> down, left -> right)

multiplot(
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23" ))$gene[1:33], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[34:66], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[67:99], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[100:132], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
easy_violin(seur=ss2_glia.seur,
            genes=(ss2_glia.masts_inlocal.major.filt2 %>% filter(contrast=="identGlia_23"))$gene[133:165], 
            data=ss2_glia.seur@assays[['RNA']]@data, 
            groups=ss2_glia.cov$major, 
            cells=rownames(ss2_glia.seur@meta.data), 
            ylim=c(0,6)),
cols = 5)

DEGs of different conditions

Age

nuclei with Age info

table(ss2_glia.seur@meta.data$Age_Processed)
## 
##   Old Young 
##  1074  1964
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age.filt1$alpha - ss2_glia.masts_inlocal.Age.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age.filt1$alpha - ss2_glia.masts_inlocal.Age.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.02*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F) + labs(title = "ss2_glia Age"),

cols = 1)

Age DEGs

Old upregulated

Young upregulated
(ss2_glia.masts_inlocal.Age.filt2 %>% filter(contrast=="identYoung"))$gene
##  [1] "Srsf2"         "9230102K24Rik" "Mvp"           "Plac9b"       
##  [5] "Scn3a"         "Slc34a2"       "Tmem181c-ps"   "Spag5"        
##  [9] "Paqr6"         "Tmem254b"      "Igf2r"         "Utp11l"       
## [13] "Wdr6"          "Taf6"          "Dtx3"          "Cd320"        
## [17] "Srsf6"         "Zdhhc1"        "Dalrd3"        "Hsf5"         
## [21] "Ccdc88c"       "Gdi1"          "Nrbp2"         "Ubn1"         
## [25] "Iqcg"          "Meg3"          "Brd9"          "Col3a1"       
## [29] "1810046K07Rik" "2410018M08Rik" "Pnldc1"        "Dnaaf3"       
## [33] "Cep55"         "Tnni3"         "Ttll6"

Age_Glia1

nuclei with Age info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Age_Processed)
## 
##   Old Young 
##   404   670
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia1.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia1.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.02*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_1",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Age_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Age_Glia1 DEGs

Old upregulated

Young upregulated

Age_Glia23

nuclei with Age info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Age_Processed)
## 
##   Old Young 
##   670  1294
# quantile with step 0.05
test_quant.Age <- data.frame(quantile(ss2_glia.masts_inlocal.Age_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia23.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Age) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Age_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Age_Glia23.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.3))
text(x=0:20*1.2+0.65,y=test_quant.Age[,1]+0.01*test_quant.Age[,1]/abs(test_quant.Age[,1]),round(test_quant.Age[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Age_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Age_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Age_Glia23 DEGs

Old upregulated

Young upregulated

Model

should pass Model part, >99% nuclei from Sox10 mice

nuclei with Model info

table(ss2_glia.seur@meta.data$Model_Processed)
## 
## Sox10 Uchl1 
##  3018    15
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Model',label = F) + labs(title = "ss2_glia Model") +
  coord_fixed(ratio=0.94),
cols = 1)

Segment

nuclei with Segment info

table(ss2_glia.seur@meta.data$Segment_Processed)
## 
##   Distal Proximal 
##     1266     1585
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment.filt1$alpha - ss2_glia.masts_inlocal.Segment.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment.filt1$alpha - ss2_glia.masts_inlocal.Segment.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(-0.15,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F) + labs(title = "ss2_glia Segment"),
cols = 1)

Segment DEGs

Distal upregulated

Proximal upregulated

Segment_Glia1

nuclei with Segment info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Segment_Processed)
## 
##   Distal Proximal 
##      496      516
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia1.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia1.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_1",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Segment_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Segment_Glia1 DEGs

Distal upregulated

Proximal upregulated

Segment_Glia23

nuclei with Segment info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Segment_Processed)
## 
##   Distal Proximal 
##      770     1069
# quantile with step 0.05
test_quant.Segment <- data.frame(quantile(ss2_glia.masts_inlocal.Segment_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia23.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Segment) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Segment_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Segment_Glia23.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.6))
text(x=0:20*1.2+0.65,y=test_quant.Segment[,1]+0.02*test_quant.Segment[,1]/abs(test_quant.Segment[,1]),round(test_quant.Segment[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Segment_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Segment_Glia23 DEGs

Distal upregulated

Proximal upregulated

Seg_raw

nuclei with Seg_raw info ('All' excluded)

table(ss2_glia.seur@meta.data$Segment)
## 
##   1   2   3   4 All 
## 909 676 567 699 187

why calculate Seg1/2/3/4 separately while already have Distal/Proximal ?
used to try different parameters to repeat DEG results in paper,
for Segment part, still not sure how they did it, here just list results in both ways.

# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),round(test_quant.Seg_raw[,1],3),cex=0.7)

Seg_raw DEGs

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F) + labs(title = "ss2_glia Seg_raw") +
  coord_fixed(ratio=0.95),
cols = 1)

Seg3+4(Distal) upregulated

Seg1+2(Proximal) upregulated

Seg_raw_Glia1

nuclei with Seg_raw info ('All' excluded)

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Segment)
## 
##   1   2   3   4 All 
## 254 262 219 277  62
# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia1.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),
     round(test_quant.Seg_raw[,1],3),cex=0.7)

Seg_raw DEGs

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F, pt.size = 0.55,
        cells = grep("Glia_1",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Seg_raw_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)

Seg3+4(Distal) upregulated

Seg1+2(Proximal) upregulated

Seg_raw_Glia23

nuclei with Seg_raw info ('All' excluded)

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Segment)
## 
##   1   2   3   4 All 
## 655 414 348 422 125
# quantile with step 0.05
test_quant.Seg_raw <- data.frame(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Seg_raw) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Seg_raw_Glia23.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.55))
text(x=0:20*1.2+0.65,y=test_quant.Seg_raw[,1]+0.02*test_quant.Seg_raw[,1]/abs(test_quant.Seg_raw[,1]),
     round(test_quant.Seg_raw[,1],3),cex=0.7)

Seg_raw DEGs

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(rep("grey",2),glia.colors.inlocal[1:2])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Segment',label = F, pt.size = 0.55,
        cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Seg_raw_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),
cols = 1)

Seg3+4(Distal) upregulated

Seg1+2(Proximal) upregulated

Sex

nuclei with Sex info

table(ss2_glia.seur@meta.data$Sex_Processed)
## 
##    F    M 
## 1081 1957
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex.filt1$alpha - ss2_glia.masts_inlocal.Sex.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex.filt1$alpha - ss2_glia.masts_inlocal.Sex.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.02*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F) + labs(title = "ss2_glia Sex") +
  coord_fixed(ratio=0.97),
cols = 1)

Sex DEGs

Male upregulated

Sex_Glia1

nuclei with Sex info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Sex_Processed)
## 
##   F   M 
## 381 693
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia1.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia1.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.02*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_1",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Sex_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Sex_Glia1 DEGs

Male upregulated

Female upregulated

Sex_Glia23

nuclei with Sex info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Sex_Processed)
## 
##    F    M 
##  700 1264
# quantile with step 0.05
test_quant.Sex <- data.frame(quantile(ss2_glia.masts_inlocal.Sex_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia23.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Sex) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Sex_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Sex_Glia23.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,1.1))
text(x=0:20*1.2+0.65,y=test_quant.Sex[,1]+0.028*test_quant.Sex[,1]/abs(test_quant.Sex[,1]),round(test_quant.Sex[,1],3),cex=0.7)

here Two F specific genes strangely get too low padjD to be zero, so not plotted.

(ss2_glia.masts_inlocal.Sex_Glia23.filt2 %>% filter(contrast == "identF"))[,c("gene","contrast","coefD","padjD")]
##          gene contrast     coefD        padjD
## 1        Xist   identF 9.9678890 0.000000e+00
## 2        Tsix   identF 8.3296312 0.000000e+00
## 3      Zfp382   identF 1.0967016 1.420307e-25
## 4       P2rx1   identF 1.0133499 3.812840e-22
## 5 Tmem181b-ps   identF 0.9796084 1.376305e-19
multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Sex_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Sex_Glia2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Sex_Glia23 DEGs

Male upregulated

Female upregulated

Time

nuclei with Time info

table(ss2_glia.seur@meta.data$Time_Processed)
## 
##  7AM  7PM 
## 1492 1390
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time.filt1$alpha - ss2_glia.masts_inlocal.Time.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time.filt1$alpha - ss2_glia.masts_inlocal.Time.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.45))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.012*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F) + labs(title = "ss2_glia Time") +
  coord_fixed(ratio=0.95),
cols = 1)

Time DEGs

Evening upregulated

Morning upregulated

Time_Glia1

nuclei with Time info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_1"))$Time_Processed)
## 
## 7AM 7PM 
## 514 511
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia1.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time_Glia1.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia1.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.5))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.02*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(glia.colors.inlocal[1:2],rep("grey",2))) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_1",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Time_Glia1") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Time_Glia1 DEGs

Evening upregulated

Morning upregulated

Time_Glia23

nuclei with Time info

table((ss2_glia.seur@meta.data %>% filter(inlocal == "Glia_2" |inlocal == "Glia_3"))$Time_Processed)
## 
## 7AM 7PM 
## 978 879
# quantile with step 0.05
test_quant.Time <- data.frame(quantile(ss2_glia.masts_inlocal.Time_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia23.filt1$ref_alpha,
                                  probs = seq(0,1,0.05)))
colnames(test_quant.Time) <- "alpha-ref_alpha"
#test_quant

# choose 95% as cutoff
par(mar=c(3,2,3,2))
barplot(quantile(ss2_glia.masts_inlocal.Time_Glia23.filt1$alpha - ss2_glia.masts_inlocal.Time_Glia23.filt1$ref_alpha,
                 probs = seq(0,1,0.05)),col = c(rep("grey",19),"lightblue","grey"),
        main = "Quantile of 'alpha - ref_alpha'",ylim=c(0,0.4))
text(x=0:20*1.2+0.65,y=test_quant.Time[,1]+0.012*test_quant.Time[,1]/abs(test_quant.Time[,1]),round(test_quant.Time[,1],3),cex=0.7)

multiplot(
DimPlot(ss2_glia.seur, group.by = 'K300',label = T,
        cols = c(rep("grey",2),glia.colors.inlocal[3:4])) + labs(title = "ss2_glia inlocal new"),
DimPlot(ss2_glia.seur, group.by = 'Time_Processed',label = F, pt.size = 0.55,
        cells = grep("Glia_2|Glia_3",ss2_glia.cov$inlocal)) + 
  labs(title = "ss2_glia Time_Glia_2/3") + coord_fixed(xlim = c(-50,38), ylim = c(-40,35)),

cols = 1)

Time_Glia23 DEGs

Evening upregulated

Morning upregulated

ss2_glia part ends here.